library("here")
library("sjPlot")
library("dplyr")
library("lme4")
library("viridis")
library("lmerTest")
library("ggplot2")
library("gridExtra")
library("gt")

source("analysis_functions.R")

1. MMRR

1.1 Individual sampling

1.1.1 Summary plots

mmrr_ind <- format_mmrr(here(dirname(getwd()), "p3_methods", "outputs", "mmrr_indsampling_results.csv"))

summary_hplot(mmrr_ind, "ratio_ae", colpal = "viridis", direction = -1)

summary_hplot(mmrr_ind, "ratio_err", colpal = "viridis", direction = -1)

summary_hplot(mmrr_ind, "comboenv_err", divergent = TRUE)

summary_hplot(mmrr_ind, "geo_err", divergent = TRUE)

1.1.2 Model summaries

run_lmer(mmrr_ind, "ratio_ae", table_main = "Ratio Absolute Error")
Predictors Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
nsamp 9.87 3.29 3.00 15346 565.93 0.0***
sampstrat 1.36 0.45 3.00 15346 77.68 7.3 × 10−50***
K 0.82 0.82 1.00 15346 140.83 2.4 × 10−32***
m 11.36 11.36 1.00 15346 1.95K 0.0***
phi 0.03 0.03 1.00 15346 4.82 0.028**
H 0.52 0.52 1.00 15346 89.76 3.1 × 10−21***
r 0.49 0.49 1.00 15346 85.09 3.2 × 10−20***
*** p < 0.001
** p < 0.05
run_lmer(mmrr_ind, "ratio_err", table_main = "Ratio Error")
Predictors Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
nsamp 0.06 0.02 3.00 15346 1.62 0.18
sampstrat 8.91 2.97 3.00 15346 229.55 1.1 × 10−145***
K 0.01 0.01 1.00 15346 0.45 0.50
m 0.01 0.01 1.00 15346 0.95 0.33
phi 0.00 0.00 1.00 15346 0.37 0.54
H 0.02 0.02 1.00 15346 1.22 0.27
r 0.22 0.22 1.00 15346 17.12 3.5 × 10−5***
*** p < 0.001
run_lmer(mmrr_ind, "comboenv_err", table_main = "IBE Error")
Predictors Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
nsamp 0.00 0.00 3.00 15346 0.72 0.540
sampstrat 0.72 0.24 3.00 15346 237.68 9.5 × 10−151***
K 0.00 0.00 1.00 15346 4.93 0.026**
m 0.01 0.01 1.00 15346 10.41 1.3 × 10−3*
phi 0.00 0.00 1.00 15346 0.00 1.000
H 0.01 0.01 1.00 15346 5.22 0.022**
r 0.01 0.01 1.00 15346 14.66 1.3 × 10−4***
*** p < 0.001
** p < 0.05
* p < 0.01
run_lmer(mmrr_ind, "geo_err", table_main = "IBD Error")
Predictors Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
nsamp 0.15 0.05 3.00 15346 28.25 3.3 × 10−18***
sampstrat 6.13 2.04 3.00 15346 1.12K 0.0***
K 0.00 0.00 1.00 15346 0.23 0.640
m 0.00 0.00 1.00 15346 0.14 0.710
phi 0.00 0.00 1.00 15346 1.39 0.240
H 0.01 0.01 1.00 15346 3.92 0.048**
r 0.00 0.00 1.00 15346 0.22 0.640
*** p < 0.001
** p < 0.05

1.1.3 Megaplots

MEGAPLOT(mmrr_ind, "ratio_ae", colpal = "viridis", direction = -1)

MEGAPLOT(mmrr_ind, "ratio_err", colpal = "viridis", direction = -1)

MEGAPLOT(mmrr_ind, "comboenv_err", divergent = TRUE)

MEGAPLOT(mmrr_ind, "geo_err", divergent = TRUE)

1.2 Site sampling

1.2.1 Summary plots

mmrr_site <- format_mmrr(here(dirname(getwd()), "p3_methods", "outputs", "mmrr_sitesampling_results.csv"))

mmrr_site$ratio_ae <- abs(mmrr_site$ratio_err)
summary_hplot(mmrr_site, "ratio_ae", colpal = "viridis", direction = -1)

summary_hplot(mmrr_site, "ratio_err", divergent = TRUE)

summary_hplot(mmrr_site, "comboenv_err", divergent = TRUE)

summary_hplot(mmrr_site, "geo_err", divergent = TRUE)

1.2.2 Model summaries

run_lmer(mmrr_site, "ratio_ae", table_main = "Ratio Absolute Error")
Predictors Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
nsamp 12.59 6.29 2.00 8628 441.67 2.4 × 10−183***
sampstrat 1.56 0.78 2.00 8628 54.60 2.7 × 10−24***
K 0.00 0.00 1.00 8628 0.01 0.910
m 0.66 0.66 1.00 8628 46.59 9.4 × 10−12***
phi 0.05 0.05 1.00 8628 3.78 0.052
H 0.40 0.40 1.00 8628 28.06 1.2 × 10−7***
r 0.31 0.31 1.00 8628 22.05 2.7 × 10−6***
*** p < 0.001
run_lmer(mmrr_site, "ratio_err", table_main = "Ratio Error")
Predictors Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
nsamp 0.04 0.02 2.00 8628 0.53 0.590
sampstrat 2.66 1.33 2.00 8628 38.18 3.1 × 10−17***
K 0.14 0.14 1.00 8628 3.90 0.048**
m 0.48 0.48 1.00 8628 13.66 2.2 × 10−4***
phi 0.05 0.05 1.00 8628 1.35 0.250
H 6.42 6.42 1.00 8628 183.82 1.9 × 10−41***
r 0.06 0.06 1.00 8628 1.83 0.180
*** p < 0.001
** p < 0.05
run_lmer(mmrr_site, "comboenv_err", table_main = "IBE Error")
Predictors Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
nsamp 0.00 0.00 2.00 8628 0.02 0.980
sampstrat 0.50 0.25 2.00 8628 51.55 5.5 × 10−23***
K 0.02 0.02 1.00 8628 3.18 0.074
m 0.01 0.01 1.00 8628 1.20 0.270
phi 0.01 0.01 1.00 8628 1.28 0.260
H 0.76 0.76 1.00 8628 157.52 8.1 × 10−36***
r 0.00 0.00 1.00 8628 0.86 0.350
*** p < 0.001
run_lmer(mmrr_site, "geo_err", table_main = "IBD Error")
Predictors Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
nsamp 0.03 0.02 2.00 8628 3.34 0.035***
sampstrat 0.34 0.17 2.00 8628 36.25 2.1 × 10−16**
K 3.12 3.12 1.00 8628 657.68 7.5 × 10−140**
m 69.60 69.60 1.00 8628 14.69K 0.0**
phi 0.48 0.48 1.00 8628 100.55 1.5 × 10−23**
H 0.17 0.17 1.00 8628 36.06 2.0 × 10−9**
r 0.01 0.01 1.00 8628 1.59 0.210
*** p < 0.05
** p < 0.001

1.2.3 Megaplots

MEGAPLOT(mmrr_site, "ratio_ae", colpal = "viridis", direction = -1)

MEGAPLOT(mmrr_site, "ratio_err", divergent = TRUE)

MEGAPLOT(mmrr_site, "comboenv_err", divergent = TRUE)

MEGAPLOT(mmrr_site, "geo_err", divergent = TRUE)

2. GDM

2.1 Individual sampling

2.1.1 Summary plots

gdm_ind <- format_gdm(here(dirname(getwd()), "p3_methods", "outputs", "gdm_indsampling_results.csv"))

summary_hplot(gdm_ind, "ratio_ae", colpal = "viridis", direction = -1)

summary_hplot(gdm_ind, "ratio_err", colpal = "viridis", direction = -1)

summary_hplot(gdm_ind, "comboenv_err", divergent = TRUE)

summary_hplot(gdm_ind, "geo_err", divergent = TRUE)

2.1.2 Model summaries

run_lmer(gdm_ind, "ratio_ae", table_main = "Ratio Absolute Error")
Predictors Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
nsamp 11.69 3.90 3.00 15341 340.65 3.6 × 10−214***
sampstrat 0.59 0.20 3.00 15341 17.31 3.3 × 10−11***
K 1.88 1.88 1.00 15341 164.64 1.7 × 10−37***
m 23.08 23.08 1.00 15341 2.02K 0.0***
phi 0.06 0.06 1.00 15341 4.84 0.028**
H 0.00 0.00 1.00 15341 0.42 0.520
r 0.20 0.20 1.00 15341 17.56 2.8 × 10−5***
*** p < 0.001
** p < 0.05
run_lmer(gdm_ind, "ratio_err", table_main = "Ratio Error")
Predictors Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
nsamp 12.34 4.11 3.00 15341 274.37 1.8 × 10−173***
sampstrat 0.62 0.21 3.00 15341 13.86 5.1 × 10−9***
K 1.25 1.25 1.00 15341 83.52 7.1 × 10−20***
m 19.32 19.32 1.00 15341 1.29K 4.5 × 10−271***
phi 0.89 0.89 1.00 15341 59.33 1.4 × 10−14***
H 0.40 0.40 1.00 15341 26.92 2.1 × 10−7***
r 0.23 0.23 1.00 15341 15.22 9.6 × 10−5***
*** p < 0.001
run_lmer(gdm_ind, "comboenv_err", table_main = "IBE Error")
Predictors Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
nsamp 7.16 2.39 3.00 15341 541.20 0.0***
sampstrat 0.11 0.04 3.00 15341 8.46 1.3 × 10−5***
K 0.00 0.00 1.00 15341 0.09 0.76
m 0.66 0.66 1.00 15341 148.78 4.6 × 10−34***
phi 0.10 0.10 1.00 15341 22.95 1.7 × 10−6***
H 0.25 0.25 1.00 15341 57.20 4.2 × 10−14***
r 0.12 0.12 1.00 15341 26.44 2.8 × 10−7***
*** p < 0.001
run_lmer(gdm_ind, "geo_err", table_main = "IBD Error")
## boundary (singular) fit: see help('isSingular')
Predictors Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
nsamp 106.15 35.38 3.00 15343 653.85 0.0***
sampstrat 32.12 10.71 3.00 15343 197.87 6.5 × 10−126***
K 10.24 10.24 1.00 15343 189.23 8.4 × 10−43***
m 11.90 11.90 1.00 15343 219.96 2.0 × 10−49***
phi 0.14 0.14 1.00 15343 2.55 0.110
H 0.27 0.27 1.00 15343 5.00 0.025**
r 0.15 0.15 1.00 15343 2.79 0.095
*** p < 0.001
** p < 0.05

2.1.3 Megaplots

MEGAPLOT(gdm_ind, "ratio_ae", colpal = "viridis", direction = -1)

MEGAPLOT(gdm_ind, "ratio_err", colpal = "viridis", direction = -1)

MEGAPLOT(gdm_ind, "comboenv_err", divergent = TRUE)

MEGAPLOT(gdm_ind, "geo_err", divergent = TRUE)

2.1.4 Prop NA

Confirm that the distribution of NAs is as expected and the proportions are small

MEGAPLOT(gdm_ind, "geo_coeff", aggfunc = "prop_na", colpal = "mako")

2.2 Site sampling

2.2.1 Summary plots

gdm_site <- format_gdm(here(dirname(getwd()), "p3_methods", "outputs", "gdm_sitesampling_results.csv"))

gdm_site$ratio_ae <- abs(gdm_site$ratio_err)
summary_hplot(gdm_site, "ratio_ae", colpal = "viridis", direction = -1)

summary_hplot(gdm_site, "ratio_err", divergent = TRUE)

summary_hplot(gdm_site, "comboenv_err", divergent = TRUE)

summary_hplot(gdm_site, "geo_err", divergent = TRUE)

2.2.2 Model summaries

run_lmer(gdm_site, "ratio_ae", table_main = "Ratio Absolute Error")
Predictors Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
nsamp 18.83 9.42 2.00 8342.001 308.59 5.1 × 10−130***
sampstrat 11.50 5.75 2.00 8342.001 188.41 9.3 × 10−81***
K 0.09 0.09 1.00 8342.001 2.90 0.089
m 0.35 0.35 1.00 8342.000 11.52 6.9 × 10−4***
phi 0.00 0.00 1.00 8342.001 0.01 0.930
H 0.00 0.00 1.00 8342.000 0.00 0.960
r 0.16 0.16 1.00 8342.004 5.24 0.022**
*** p < 0.001
** p < 0.05
run_lmer(gdm_site, "ratio_err", table_main = "Ratio Error")
Predictors Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
nsamp 19.92 9.96 2.00 8342.001 283.78 5.8 × 10−120***
sampstrat 13.76 6.88 2.00 8342.001 196.00 6.5 × 10−84***
K 0.05 0.05 1.00 8342.001 1.34 0.250
m 0.02 0.02 1.00 8342.000 0.71 0.400
phi 0.21 0.21 1.00 8342.001 5.89 0.015**
H 0.27 0.27 1.00 8342.000 7.79 5.3 × 10−3*
r 0.20 0.20 1.00 8342.004 5.83 0.016**
*** p < 0.001
** p < 0.05
* p < 0.01
run_lmer(gdm_site, "comboenv_err", table_main = "IBE Error")
Predictors Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
nsamp 11.64 5.82 2.00 8342.001 309.03 3.4 × 10−130***
sampstrat 1.53 0.76 2.00 8342.001 40.53 3.0 × 10−18***
K 0.19 0.19 1.00 8342.001 10.29 1.3 × 10−3**
m 0.84 0.84 1.00 8342.000 44.77 2.4 × 10−11***
phi 0.06 0.06 1.00 8342.001 3.40 0.065
H 0.04 0.04 1.00 8342.000 2.00 0.160
r 0.13 0.13 1.00 8342.003 6.85 8.9 × 10−3**
*** p < 0.001
** p < 0.01
run_lmer(gdm_site, "geo_err", table_main = "IBD Error")
Predictors Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
nsamp 7.21 3.60 2.00 8342.015 11.60 9.3 × 10−6***
sampstrat 700.45 350.22 2.00 8342.021 1.13K 0.0***
K 363.91 363.91 1.00 8342.020 1.17K 2.6 × 10−240***
m 2.00K 2.00K 1.00 8342.002 6.43K 0.0***
phi 16.23 16.23 1.00 8342.017 52.22 5.4 × 10−13***
H 1.26 1.26 1.00 8342.006 4.05 0.044**
r 1.36 1.36 1.00 8342.053 4.39 0.036**
*** p < 0.001
** p < 0.05

2.2.3 Megaplots

MEGAPLOT(gdm_site, "ratio_ae", colpal = "viridis", direction = -1)

MEGAPLOT(gdm_site, "ratio_err", divergent = TRUE)

MEGAPLOT(gdm_site, "comboenv_err", divergent = TRUE)

MEGAPLOT(gdm_site, "geo_err", divergent = TRUE)

2.2.4 Prop NA

Confirm that the distribution of NAs is as expected and the proportions are small

MEGAPLOT(gdm_site, "geo_coeff", aggfunc = "prop_na", colpal = "mako")

3. Compare results of MMRR and GDM

plot(mmrr_ind$geo_coeff, gdm_ind$geo_coeff, col = gdm_ind$m)
legend("topleft", c("0.25", "1"), col = c("black", "red"), pch = 1)

plot(mmrr_ind$comboenv_coeff, gdm_ind$comboenv_coeff)

df <- data.frame(mmrr_ind, 
                 geo_mg = gdm_ind$geo_coeff/mmrr_ind$geo_coeff, 
                 env_mg = gdm_ind$comboenv_coeff/mmrr_ind$comboenv_coeff,
                 ratio_mg = gdm_ind$ratio/mmrr_ind$ratio )
summary_hplot(df, "geo_mg")

summary_hplot(df, "env_mg")

summary_hplot(df, "ratio_mg")

MEGAPLOT(df, "geo_mg")

MEGAPLOT(df, "env_mg")

MEGAPLOT(df, "ratio_mg")

plot(mmrr_site$geo_coeff, gdm_site$geo_coeff, col = gdm_site$m)
legend("topleft", c("0.25", "1"), col = c("black", "red"), pch = 1)

plot(mmrr_site$comboenv_coeff, gdm_site$comboenv_coeff)

df <- data.frame(mmrr_site, 
                 geo_mg = gdm_site$geo_coeff/mmrr_site$geo_coeff, 
                 env_mg = gdm_site$comboenv_coeff/mmrr_site$comboenv_coeff,
                 ratio_mg = gdm_site$ratio/mmrr_site$ratio )
summary_hplot(df, "geo_mg")

summary_hplot(df, "env_mg")

summary_hplot(df, "ratio_mg")